import requests
import openpyxl
import pandas as pd
from stylecloud import gen_stylecloud
import jieba

"""
作者：李运辰

公众号：Python研究者

"""

### 采集数据
def get_data():
    outwb = openpyxl.Workbook()
    outws = outwb.create_sheet(index=0)
    outws.cell(row=1, column=1, value="标题")
    outws.cell(row=1, column=2, value="原标题")
    outws.cell(row=1, column=3, value="关键词")

    headers = {
                'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36',
    }
    count = 2
    for page in range(1,126):
        url="https://feed.sina.com.cn/api/roll/get?pageid=121&lid=1356&num=20&versionNumber=1.2.4&page="+str(page)+"&encode=utf-8"
        res = requests.get(url,headers=headers).json()
        data = res['result']['data']
        for i in data:
            #print(i['title'])
            #print(i['intro'])
            #print(i['keywords'])
            outws.cell(row=count, column=1, value=i['title'])
            outws.cell(row=count, column=2, value=i['intro'])
            outws.cell(row=count, column=3, value=i['keywords'])
            count = count + 1
            print("采集第"+str(count)+"条")
    outwb.save("新闻数据-李运辰.xls")  # 保存

### 绘制词云图
def drawpic():
    datafile = u'新闻数据-李运辰.xls'
    data = pd.read_excel(datafile)

    d1 = data['标题'].tolist()
    d2 = data['原标题'].tolist()
    d3 = data['关键词'].tolist()


    ###标题词云图
    word_list = jieba.cut("".join(d1))
    result = " ".join(word_list)  # 分词用 隔开
    picp = '标题.png'
    gen_stylecloud(text=result, icon_name="fas fa-hand-point-left", font_path='simsun.ttc', background_color="white",output_name=picp)  # 必须加中文字体，否则格式错误


    ###原标题词云图
    word_list = jieba.cut(("".join(d2)).replace("原标题：",""))
    result = " ".join(word_list)  # 分词用 隔开
    #制作中文云词
    picp = '原标题.png'
    gen_stylecloud(text=result, icon_name="fas fa-hand-point-right", font_path='simsun.ttc', background_color="white",output_name=picp)  # 必须加中文字体，否则格式错误

    ###关键词词云图
    d3_list = []
    for i in d3:
        i = str(i)
        if i!="nan":
            if "," in i:
                for k in i.split(","):
                    d3_list.append(k)
            else:
                d3_list.append(i)
    result = " ".join(d3_list)  # 分词用 隔开
    # # 制作中文云词
    picp = '关键词.png'
    gen_stylecloud(text=result, icon_name="fas fa-hand-point-up", font_path='simsun.ttc', background_color="white",output_name=picp)  # 必须加中文字体，否则格式错误

drawpic()
